import plotly.express as px
import pandas as pd
df = pd.read_csv("data/student_depression_dataset.csv")
# Palette colors
COL_OK = "#1E88E5"
COL_RISK = "#D81B60"
TEMPLATE = "presentation"
def bump_fonts(fig, base=20):
"""Aumenta i font di titoli, assi e legenda."""
fig.update_layout(
font=dict(size=base),
title_font=dict(size=base + 2),
legend_font=dict(size=base),
)
fig.update_xaxes(title_font=dict(size=base), tickfont=dict(size=base - 2))
fig.update_yaxes(title_font=dict(size=base), tickfont=dict(size=base - 2))
return fig
gender_counts = df["Gender"].value_counts().reset_index()
gender_counts.columns = ["Gender", "count"]
fig_gender_pie = px.pie(
gender_counts,
names="Gender",
values="count",
template=TEMPLATE,
color_discrete_sequence=[COL_OK, COL_RISK],
title="<b>Gender composition</b>",
hole=0
)
bump_fonts(fig_gender_pie)
fig_gender_pie.update_traces(textposition="inside", texttemplate="%{percent:.1%}")
fig_gender_pie.show()
gender_tab = (
df.groupby(["Gender", "Depression"]).size()
.reset_index(name="count")
)
gender_tab["DepressionLabel"] = gender_tab["Depression"].map({0: "No", 1: "Yes"})
gender_tab["percent"] = (
gender_tab.groupby("Gender")["count"]
.transform(lambda x: x / x.sum() * 100)
.round(1)
)
fig_gender_dep = px.bar(
gender_tab,
x="Gender",
y="percent",
color="DepressionLabel",
color_discrete_map={"No": COL_OK, "Yes": COL_RISK},
template=TEMPLATE,
barmode="stack",
text="percent",
title="<b>Depression prevalence is similar across genders</b>",
labels={"percent": "Percent (%)"},
)
fig_gender_dep.update_traces(texttemplate="%{text:.1f}%")
bump_fonts(fig_gender_dep).update_layout(legend_title_text="", yaxis_range=[0, 100])
df_35 = df[df['Age'] < 30].copy()
fig_age = px.histogram(
df_35,
x="Age",
nbins=20,
color_discrete_sequence=[COL_OK],
template=TEMPLATE,
title="<b>Age distribution</b>",
labels={"Age": "Age (years)", "count": "Frequence"},
).update_layout(bargap=0.05)
bump_fonts(fig_age)
fig_age.show()
df['Depression'] = df['Depression'].map({1: 'Yes', 0: 'No'})
# keep only ages < 35 and cast to string for clean x-axis
df_35 = df[df['Age'] < 35].copy()
df_35["age_str"] = df_35["Age"].astype(int).astype(str)
age_order = sorted(df_35["Age"].astype(int).unique())
age_order_str = [str(a) for a in age_order]
fig_age_pct = px.histogram(
df_35,
x="age_str",
color="Depression",
category_orders={"age_str": age_order_str},
barmode="stack",
barnorm="percent",
title="<b>Depression levels decline progressively with aging </b>",
labels={"age_str": "Age", "percent": "Percent (%)"},
color_discrete_map={"Yes": COL_RISK, "No": COL_OK},
)
fig_age_pct.update_traces(texttemplate="%{y:.1f}", textposition="inside")
bump_fonts(fig_age_pct).update_layout(
yaxis_range=[0, 100],
legend_title_text="",
xaxis_title="Age",
yaxis_title="Percent (%)"
)
fig_age_pct.show()
df = pd.read_csv("data/student_depression_dataset.csv")
df_diet = df[df["Dietary Habits"].str.lower() != "others"]
diet_dep = (
df_diet.groupby(["Dietary Habits", "Depression"]).size()
.reset_index(name="count")
)
diet_dep["DepLabel"] = diet_dep["Depression"].map({0: "Not depressed", 1: "Depression"})
diet_dep["percent"] = (
diet_dep.groupby("Dietary Habits")["count"]
.transform(lambda x: x / x.sum() * 100)
.round(1)
)
# Ensure “Depression” is stacked first (bottom) and “Not depressed” stacked on top
dep_order = ["Depression", "Not depressed"]
diet_dep["DepLabel"] = pd.Categorical(diet_dep["DepLabel"], categories=dep_order, ordered=True)
diet_dep = diet_dep.sort_values("DepLabel")
fig_diet_dep = px.bar(
diet_dep,
x="Dietary Habits",
y="percent",
color="DepLabel",
color_discrete_map={"Not depressed": COL_OK, "Depression": COL_RISK},
template=TEMPLATE,
barmode="stack",
text="percent",
title="<b>Less-healthy diets show markedly higher depression prevalence</b>",
labels={"Dietary Habits": "Diet quality", "percent": "Percent (%)"},
)
fig_diet_dep.update_traces(texttemplate="%{text:.1f}%")
bump_fonts(fig_diet_dep).update_layout(
legend_title_text="",
xaxis_tickangle=0,
yaxis_range=[0, 100]
)
fig_diet_dep.show()
# GRAPH F – Suicidal thoughts vs depression (%)
suic = (
df.groupby(["Have you ever had suicidal thoughts ?", "Depression"])
.size()
.reset_index(name="count")
)
suic["DepressionLabel"] = suic["Depression"].map({0: "Not depressed", 1: "Depressed"})
suic["percent"] = (
suic.groupby("Have you ever had suicidal thoughts ?")["count"]
.transform(lambda x: x / x.sum() * 100)
.round(1)
)
fig_suic = px.bar(
suic,
x="Have you ever had suicidal thoughts ?",
y="percent",
color="DepressionLabel",
color_discrete_map={"Not depressed": COL_OK, "Depressed": COL_RISK},
template=TEMPLATE,
text="percent",
barmode="stack",
title="<b>Suicidal thoughts → 4x depression</b>",
labels={
"Have you ever had suicidal thoughts ?": "Sucidal thoughts",
"percent": "Percentage (%)",
},
).update_traces(texttemplate="%{text:.1f}%")
bump_fonts(fig_suic).update_layout(legend_title_text="", yaxis_range=[0, 100])
fig_suic.show()
# GRAPH F – Financial stress vs depression (%)
fin = (
df
.groupby(["Financial Stress", "Depression"])
.size()
.reset_index(name="count")
)
fin["DepressionLabel"] = fin["Depression"].map({0: "No depression", 1: "Depressed"})
fin["percent"] = (
fin
.groupby("Financial Stress")["count"]
.transform(lambda x: x / x.sum() * 100)
.round(1)
)
fig_fin = px.bar(
fin,
x="Financial Stress",
y="percent",
color="DepressionLabel",
category_orders={"DepressionLabel": ["No Depressed", "Depressed"]},
color_discrete_map={"Depressed": COL_RISK, "No depression": COL_OK},
template=TEMPLATE,
text="percent",
barmode="stack",
title="<b>Financial stress increases depression level</b>",
labels={
"Financial Stress": "Financial Stress (1-5)",
"percent": "Percentage (%)",
},
)
fig_fin.update_traces(texttemplate="%{text:.1f}%")
bump_fonts(fig_fin).update_layout(
legend_title_text="",
yaxis_range=[0, 100]
)
fig_fin.show()
mapping = {
"'Less than 5 hours'": "Less than 5 hours",
"'5-6 hours'": "5-6 hours",
"'7-8 hours'": "7-8 Hours",
"'More than 8 hours'": "More than 8 hours"
}
df_filtered = df[df["Sleep Duration"].isin(mapping.keys())].copy()
df_filtered["SleepHoursCat"] = df_filtered["Sleep Duration"].map(mapping)
sleep = (
df_filtered
.groupby(["SleepHoursCat", "Depression"])
.size()
.reset_index(name="count")
)
sleep["DepressionLabel"] = sleep["Depression"].map({0: "No", 1: "Yes"})
sleep["percent"] = (
sleep
.groupby("SleepHoursCat")["count"]
.transform(lambda x: x / x.sum() * 100)
.round(1)
)
fig_sleep_dep = px.bar(
sleep,
x="SleepHoursCat",
y="percent",
color="DepressionLabel",
color_discrete_map={"No": COL_OK, "Yes": COL_RISK},
template=TEMPLATE,
barmode="stack",
text="percent",
title="<b>Less sleep → Higher depression prevalence</b>",
labels={
"SleepHoursCat": "Sleep duration",
"percent": "Percent (%)"
},
category_orders={
"SleepHoursCat": ["Less than 5 hours", "5-6 hours", "7-8 Hours", "More than 8 hours"],
"DepressionLabel": ["Depressed", "Not Depressed"]
}
)
fig_sleep_dep.update_traces(texttemplate="%{text:.1f}%")
bump_fonts(fig_sleep_dep).update_layout(
legend_title_text="",
yaxis_range=[0, 100]
)
fig_sleep_dep.show()
pressure_box = px.box(
df[df["Academic Pressure"].between(1, 5)],
x="Academic Pressure",
y="CGPA",
points="outliers",
template=TEMPLATE,
color_discrete_sequence=[COL_RISK],
title="<b>Higher academic pressure does not effect median CGPA</b>",
labels={"Academic Pressure": "Pressure (1-5)", "CGPA": "CGPA"},
)
pressure_box.update_traces(boxmean=True)
bump_fonts(pressure_box).update_layout(xaxis_range=[0.5, 5.5], yaxis_range=[3.9, 11.2])
pressure_box.show()
fig_hours_box = px.box(
df[df["Academic Pressure"].between(1, 5)],
x="Academic Pressure",
y="Work/Study Hours",
category_orders={"Academic Pressure": [1, 2, 3, 4, 5]},
points="outliers",
template=TEMPLATE,
color_discrete_sequence=[COL_RISK],
title="<b>Higher academic pressure is associated with longer study hours</b>",
labels={"Academic Pressure": "Pressure (1-5)", "Work/Study Hours": "Hours per day"},
)
fig_hours_box.update_traces(boxmean=True)
bump_fonts(fig_hours_box).update_layout(xaxis_range=[0.5, 5.5])
fig_hours_box.show()